rm(list=ls())
setwd("/scratch/cds2083/")

library(foreign)
library(BayesTree)
library(KRLS)
library(arm)
library(e1071)
library(SuperLearner)
library(MatchIt)

# Functions

SL.KRLS <- function(Y, X, newX, family,...){
	if(family$family=="gaussian"){
		fit.krls <- krls(X=X, y=Y)
		pred <- predict.krls(fit.krls, newdata=as.matrix(newX))$fit
		fit <- list(object=fit.krls)
	}
	if(family$family=="binomial"){
		fit.krls <- krls(X=X, y=Y)
		pred <- predict.krls(fit.krls, newdata=as.matrix(newX))$fit
		fit <- list(object=fit.krls)
	}
	out <- list(pred=pred, fit=fit)
	class(out$fit) <- c("SL.KRLS")
	return(out)
}

predict.SL.KRLS <- function(object, newdata, ...){
	pred <- predict(object, 
				newdata=newdata)$fit
	return(pred)
}

 SL.bart <- function (Y, X, newX, family, ntree = 300, sigdf = 3, 
 				sigquant = 0.9, k = 2, power = 2, base = 0.95, 
 				binaryOffset = 0, ndpost = 1000, nskip = 100, ...) 
{
    if (family$family == "gaussian") {
        fitBart <- bart(x.train = X, y.train = Y, x.test = newX, 
            ntree = ntree, sigdf = sigdf, sigquant = sigquant, 
            k = k, power = power, base = base, binaryOffset = binaryOffset, 
            ndpost = ndpost, nskip = nskip, verbose = FALSE)
        pred <- fitBart$yhat.test.mean
    }
    if (family$family == "binomial") {
        fitBart <- bart(x.train = X, y.train = as.factor(Y), 
            x.test = newX, ntree = ntree, sigdf = sigdf, sigquant = sigquant, 
            k = k, power = power, base = base, binaryOffset = binaryOffset, 
            ndpost = ndpost, nskip = nskip, verbose = FALSE)
        pred <- pnorm(apply(fitBart$yhat.test, 2, mean))
    }
    fit <- list(object = fitBart)
    out <- list(pred = pred, fit = fit)
    class(out$fit) <- c("SL.bart")
    return(out)
}



# Simulation
n.sim <- 250

resMat <- data.frame(	truth=rep(NA,n.sim),
						OLS=rep(NA,n.sim),
						matching=rep(NA,n.sim),
						naiveIPW=rep(NA,n.sim),
						learnerIPW=rep(NA,n.sim))
set.seed(678)
for(i in 1:n.sim){

n <- 500



# Key covariate

#x <- exp(rnorm(n))
#sdLim <- 8
#x[x>sd(x)*sdLim] <- max(x[x<=sd(x)*sdLim])

x.sc <- rnorm(n)
x <- x.sc[order(x.sc)]

# Potential outcomes

E.y0 <- x+ .5*(x-min(x))^2
E.y1 <- x+.75*(x-min(x))^2 + .75*(x-min(x))^3
y0 <- E.y0 + rnorm(n,0,sd=5)
y1 <- E.y1 + rnorm(n,0,sd=10)

# Confounded treatment assignment

pZ <- (1+exp(-(-.5+.75*x-.5*(x-mean(x))^2)))^(-1)
Z <- rbinom(n,1,pZ)
Y <- Z*y1 + (1-Z)*y0

# Noise covariates
x1 <- rnorm(n)
x2 <- rnorm(n)
x3 <- rnorm(n)
x4 <- rnorm(n)
x5 <- rnorm(n) 
x6 <- rnorm(n) 
x7 <- rnorm(n) 
x8 <- rnorm(n) 
x9 <- rnorm(n) 
x10 <- rnorm(n) 

# Data

dataUp <- data.frame(Y,Z,x,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10)


# Matching
matchOut <- matchit(Z~x+x1+x2+x3+x4+x5+x6+x7+x8+x9+x10,
					data=dataUp,
					method="nearest",
					distance="mahalanobis",
					replace=T, 
					discard="treat")
m.data <- match.data(matchOut)


# Naive IPW
dataUp$pFit <- predict(glm(Z~x+x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, family="binomial"),
				type="response",
				data=dataUp)
dataUp$IPW <- dataUp$Z + (1-dataUp$Z)*(dataUp$pFit/(1-dataUp$pFit))

# IPW via learner

Xmat <- dataUp[,c("x","x1","x2","x3","x4","x5","x6","x7","x8","x9","x10")]
Zvar <- dataUp[,"Z"]

	nu.admis <- min(min(c(mean(Zvar), 1-mean(Zvar))), .5)
	SL.svm.admis <- function(...,nu=nu.admis){
		SL.svm(...,nu=nu)
	}

	fitLearner <- 	SuperLearner(	Y=Zvar,
									X=Xmat,
						SL.library = c(	"SL.glm", 
										"SL.KRLS", 
										"SL.bart",
										"SL.bayesglm",
										"SL.svm.admis"
										),
						family=binomial(),
						method="method.NNLS",
						verbose=TRUE,
						cvControl=list(V=10))

dataUp$pLearner <- fitLearner$SL.predict
dataUp$learnerIPW <- dataUp$Z + (1-dataUp$Z)*(dataUp$pLearner/(1-dataUp$pLearner))

# Winsorizing for negative weights
if(sum(dataUp$learnerIPW<0)>0){
dataUp$learnerIPW[dataUp$learnerIPW<0] <- min(dataUp$learnerIPW[dataUp$learnerIPW>0])
}

print(sum(is.na(dataUp$IPW)))
print(sum(is.na(dataUp$learnerIPW)))
if(sum(is.na(dataUp$IPW))==0&sum(is.na(dataUp$learnerIPW))==0){
# Results
bTrue 	<- mean((y1-y0)[Z==1])
bOLS 	<- coef(lm(Y~Z+x+x1+x2+x3+x4+x5+x6+x7+x8+x9+x10,
				data=dataUp))["Z"]
bMatch 	<- coef(lm(	Y~Z, 
					data=m.data, 
					weights=m.data$weights))["Z"]
bIPW 	<- coef(lm(Y~Z,
				weights=dataUp$IPW,
				data=dataUp))["Z"]
bLearnerIPW <- coef(lm(Y~Z,
				weights= dataUp$learnerIPW,
				data=dataUp))["Z"]

#i <- 1				
resMat[i,] <- c(bTrue, bOLS, bMatch, bIPW, bLearnerIPW)
write.csv(resMat,
			file="sim10.csv",
			row.names=F)
}

}



